********************************************************************************
*	PROJECT: Childhood confidence and long-term outcomes (PSID)
*	PURPOSE: Run main regressions 
*	PUBLISHED: August 2022
*	CONTACT: Hannah Ruebeck, hruebeck@mit.edu
********************************************************************************
clear all
set more off
set maxvar 10000
pause on


** Paths;
local path "/Users/XXXXX/Downloads/replication materials"


local DO "`path'/code"
local CLEAN "`path'/clean"
local OUT "`path'/out/tables"


********************************************************************************
// Set up controls etc.

do "`DO'/set_up_regressions.do"
clear 

********************************************************************************
// Programs

program get_pval, rclass
    syntax, b(string) se(string) [df(string) normal]
	
	if "`df'"!="" & "`normal'"!="" {
	    di as error "df() is only specified for a t-distribution and cannot be jointly specified with normal"
		error	
	}
	if "`df'"=="" & "`normal'"=="" {
		di as error "df() or normal must be specified"
		error
	}
	
    local se = subinstr("`se'", "(", "", .)
    local se = subinstr("`se'", ")", "", .)
    if "`df'"!="" {
		local p = 2*ttail(`df',abs(`b'/`se'))
		if `p' >.1 		 local star = ""
		if `p' >.05 & `p' <=.1   local star = "$^{*}$"
		if `p' >.01 & `p' <=.05  local star = "$^{**}$"
		if `p' <=.01  		 local star = "$^{***}$" 
    }
    if "`normal'"!="" {
		local p =  2*(1-normal(abs(`b'/`se')))
		if `p' >.1 		 local star = ""  
		if `p' >.05 & `p' <=.1   local star = "$^{*}$"
		if `p' >.01 & `p' <=.05  local star = "$^{**}$"
		if `p' <=.01  		 local star = "$^{***}$"
    }
    return local star = "`star'"

end 



program write_ols_reg_table
    syntax, varlist(string) outgroup(string) outfolder(string) datafolder(string) subj(string) ///
			datatype(string) description(string) weightnum(string) ///
			[extracontrols(string) extracontrolsnote(string)] 

	
	local weightvar weight_confsamp`weightnum'
	local w "_w`weightnum'"
	local wt "_weight`weightnum'"
	
	if `weightnum'==0 local weightdesc ""
	if `weightnum'==1 local weightdesc "Observations are weighted by the inverse probability of their inclusion in the sample in 1997 given the sampling design of the CDS. All controls that are indices are normalized relative to the weighted distribution."
	if `weightnum'==2 local weightdesc "Observations are weighted by the inverse probability of their inclusion in the sample in 1997 given the sampling design of the CDS, adjusted to account for attrition from the analysis sample. All controls that are indices are normalized relative to the weighted distribution."
	if `weightnum'==3 local weightdesc "Observations are weighted so that our sample matches population shares in quintiles of income, in race categories, and in deciles of nationally-normed WJ-R math percentile scores. All controls that are indices are normalized relative to the weighted distribution."

	if "`datatype'"=="panel" {
	    local paneldesc "Basic controls also include year fixed effects when the outcome is observed in a panel."
	}
	
	if "`subj'"=="math" {
	    local Subject "Math"
		local subject "math"
		local test "ap"
	}
	if "`subj'"=="read" {
		local Subject "Reading"
		local subject "reading"
		local test "total"
	}
	
	use "`datafolder'/CDS_TAS_PSID_analysis_`datatype'.dta", clear
	
	// restricted data, fill in with blanks
	gen pc1 = .
	label variable pc1 "College quality index"
	gen sat = . 
	label variable sat "College's 75th pctile math SAT score"
	
	local numvars = 0
	local c1 = 2
	local n1 = 1
	local Cs ""
	local multis "Dependent variable: & "
	local clines ""
	local Ns "& "
	local basic "Basic controls: &"
	local added "Added background controls: &"
	foreach var in `varlist' {
		local label : variable label `var'
		local multis "`multis' \multicolumn{2}{c}{`label'} &&"

		local c2 = `c1'+1
		local Cs = "`Cs'cc c "
		local clines "`clines' \cline{`c1'-`c2'}"
		
		local n2 = `n1'+1
		local Ns "`Ns' (`n1') & (`n2') &&"
		
		local basic "`basic' \checkmark & \checkmark &&"
		local added "`added' 			 & \checkmark &&"

		local c1 = `c1'+3
		local n1 = `n1'+2
		
		local numvars = `numvars'+1

	}
	
	foreach l in multis Ns basic added {
	local len = length("``l''")-2
	local `l' = substr("``l''", 1, `len')
	}
	local numcols = `numvars'*2

    file open fh using "`outfolder'/table_main_`outgroup'`w'_`subj'.tex", write replace
	
	file write fh "\begin{tabular}{l `Cs'}" _n ///
	"\hline \hline" _n ///
	" `multis' \\" _n ///
	"`clines' " _n ///
	" `Ns' \\ " _n ///
	"`clines' " _n 
			
	local col 1
	foreach var in `varlist' {
			
		if "`var'"=="earnings_self" local format "%7.0f"
		else local format "%4.3f"
	
		local varlabel: variable label `var'

		
		* run regressions
					
		foreach s in 1 2 {
								
			
			* controls restrictions
			local controls ${spec`s'`w'}
				
			//* run regression1 *//
			
			* main indepvars
			local confindepvarlist c_fst_`subj'_overcnf_orig c_fst_`subj'_undrcnf_orig

			if !inlist("`var'", "pc1", "sat"){
			
				reg `var' `confindepvarlist' `controls' `extracontrols' [pweight = `weightvar'] if confidence_sample==1, cluster( $cluster )
				local N`col' = `e(N)'
				local df`col'_1 = `e(df_r)'
				
				* save coefficients
				foreach confindepvar in `confindepvarlist' {
					local v = substr("`confindepvar'", 12, 1)
					local b`col'_`v' = string(_b[`confindepvar'], "`format'")
					local se`col'_`v' = string(_se[`confindepvar'], "`format'")
					get_pval, b(`b`col'_`v'') se(`se`col'_`v'') df(`df`col'_1')
					local star`col'_`v' "`r(star)'"
					local bstar`col'_`v' "`b`col'_`v''`star`col'_`v''"
					local se`col'_`v'_nop = `se`col'_`v''
					local se`col'_`v' = "(" + "`se`col'_`v''" + ")"
				}
				test _b[c_fst_`subj'_overcnf_orig]==-1*_b[c_fst_`subj'_undrcnf_orig] 
				local p`col' = string(`r(p)', "%4.3f")

			}
			else {
				foreach confindepvar in `confindepvarlist' {
					local v = substr("`confindepvar'", 12, 1)
					local bstar`col'_`v' "---"
					local se`col'_`v' ""
				}
				local p`col' "---"
				local N`col' "---"
			}
			
			
	
			//* run regression2 *//
			
			* main indepvars
			local confindepvarlist c_fst_z_rsd_u_`subj'cnf_ratesk7_w`weightnum'
			
			if !inlist("`var'", "pc1", "sat"){

				reg `var' `confindepvarlist' `controls' `extracontrols' [pweight = `weightvar'] if confidence_sample==1, cluster( $cluster )
				
				assert `N`col'' == `e(N)'
				local df`col'_2 = `e(df_r)'
				gen esample = e(sample)
				
				* save coefficients
				foreach confindepvar in `confindepvarlist' {
					local v  c
					local b`col'_`v' = string(_b[`confindepvar'], "`format'")
					local se`col'_`v' = string(_se[`confindepvar'], "`format'")
					get_pval, b(`b`col'_`v'') se(`se`col'_`v'') df(`df`col'_2')
					local star`col'_`v' "`r(star)'"
					local bstar`col'_`v' "`b`col'_`v''`star`col'_`v''"
					local se`col'_`v'_nop = `se`col'_`v''
					local se`col'_`v' = "(" + "`se`col'_`v''" + ")"
				}
				
				
				//* sample mean *//
				assert e(sample)==esample
				drop esample
				sum `var' [aweight = `weightvar'] if e(sample)==1 
				local mean`col' = string(`r(mean)', "`format'")


			}
			else {
				local v c
				local bstar`col'_`v' "---"
				local se`col'_`v' ""
			}
			

		
		 
			local col = `col'+1  
		}
	}
			
	* write rows to table
	foreach v in o u c {
		
		if "`v'"=="o" {
			file write fh "\\ \multicolumn{8}{l}{\emph{Panel A: Independent variables are binary measures of over- and under-confidence}} \\ \\" _n
			local label "Over-confidence"
			local statlist bstar se
		}
		if "`v'"=="c" {
			file write fh "\\ \multicolumn{8}{l}{\emph{Panel B: Independent variable is degrees of over- and under-confidence in standard deviation units}} \\ \\" _n
			local label "Confidence"
			local statlist bstar se N 
		}
		if "`v'"=="u" {
			local label "Under-confidence"
			local statlist bstar se N p 
		}
		
		foreach stat in `statlist' {
			if "`stat'"=="bstar" file write fh "\hspace{10pt} `label' & "
			if "`stat'"=="se" file write fh "  & "
			if "`stat'"=="N" file write fh "\hspace{20pt} N &"
			if "`stat'"=="p" file write fh "\hspace{20pt} OC = -1*UC?  \emph{p-value}:  &"
			
			forval c = 1/`numcols' {
				if inlist("`stat'", "bstar", "se") file write fh " ``stat'`c'_`v'' "
				else			 file write fh " ``stat'`c'' "
				if mod(`c', 2)==0 & `c'!=`numcols' file write fh " && "
				else if mod(`c', 2)!=0  file write fh " & "
				else file write fh "\\" _n
			}
			if "`v'"=="u" & "`stat'"=="N" file write fh "\\" _n

		}
	}
	local n =`numcols'-1
	file write fh "\\  Sample mean of dep. var. & "
	forval c = 1(2)`n' {
		file write fh "`mean`c''"
		if `c'!=`n' file write fh " &&& "
		else file write fh " & \\ \\" _n
		}
		
		* checkmarks
		file write fh "`basic'  \\" _n
		file write fh "`added'  \\" _n

	file write fh "\hline \hline " _n 
	file write fh "\end{tabular}" _n
	file write fh "\begin{tabular}{ p{7in} }" _n 
	file write fh "\footnotesize" _n 
	#delimit ;
    file write fh "Notes:  This table regresses `description' outcomes on childhood biased beliefs with various controls. 
	Biased beliefs are measured in the earlies observed wave in the CDS with non-missing test scores and self-assessed ability. 
	In Panel A, the outcome is regressed on an indicator for over-confidence, an indicator for under-confidence and our basic set of controls (in odd-numbered columns) and our extended set of controls (in even-numbered columns). The p-value listed tests whether the coefficient on the over-confidence indicator is equal to -1 times the coefficient on the under-confidence indicator. In Panel B, the outcome is regressed on our more continous measure of biased beliefs which has been standardized to have mean zero and standard deviation one in our sample and the same sets of controls. 
	All controls are the same as described in Table \ref{tab:persist_conf}, minus the controls for adolescent test score deciles. 
	`paneldesc' `extracontrolsnote'  
	`weightdesc' Standard errors are clustered at the family level and included in parentheses below each 
	estimate. *, **, and *** indicate significance at the 0.1, 0.05, and 0.01 percent level, respectively. " _n ;
	#delimit cr
    file write fh "\end{tabular}" _n 
    file close fh 	
end



********************************************************************************
// Run regressions and write tables

#delimit ;


local weightnum 0;
local w "_w`weightnum'";
local subj math;


 * Table 3: educational achievement and attainment;    
    write_ols_reg_table, 
	varlist(sec_chld_math_ap_pctile sec_chld_read_total_pctile 
		    ever_atleast_grad_hs_self ever_atleast_grad_bach_self)
	datatype(1year)
	outgroup(education1)
	description("educational achievement and attainment")
	datafolder("`CLEAN'")  
	outfolder("`OUT'")
	weightnum("`weightnum'")
	subj(`subj'); 
	
 * Table 4: college quality (restricted data), stem major, and graduate degree;
	write_ols_reg_table, 
	varlist( pc1 sat stemmajor ever_atleast_grad_grad_self  )
	datatype(1year)
	outgroup(education2)
	description("college outcomes")
	datafolder("`CLEAN'")  
	outfolder("`OUT'")
	weightnum("`weightnum'")
	subj(`subj'); 
	

  * Table 5: labor market outcomes;
    write_ols_reg_table, 
	varlist(occ_stem_self_over25 occ_othhiskll_self_over25 ln_earnings_self_over25 unemp_self_over25   )
	datatype(panel)
	outgroup(employment) 
	description("employment")
	datafolder("`CLEAN'")  
	outfolder("`OUT'")
	weightnum("`weightnum'")
	subj(`subj'); 
	 
	 
  * Table 6: adult confidence;	
	write_ols_reg_table, 
	varlist(ta_math_conf`w' ta_reading_conf`w' ta_academic_conf`w' ta_career_conf`w' ta_general_conf`w' )
	datatype(panel)
	outgroup(adult_confidence)
	description("young adult confidence")
	datafolder("`CLEAN'")  
	outfolder("`OUT'")
	weightnum("`weightnum'")
	subj(`subj')
	extracontrols(${extracontrols_adultconf_w`weightnum'})
	extracontrolsnote("In this table, we add controls for adolescent test score deciles in math and reading, as well as adolescent general confidence and digit span scores in all specifications."); 
	




